#Installing Packages
#VIEWING DATA
getwd()
## [1] "/Users/elragaby/Desktop/Data Science"
dir()
## [1] "coor" "Data Science.Rproj"
## [3] "Heart2022.csv" "HeartRData.R"
## [5] "NBAPlayersData.csv" "neighborhood_boundaries.geojson"
## [7] "NY Airbnb Listings.Rmd" "NY-Airbnb-Listings.Rmd"
## [9] "NY.R" "NY2024.csv"
## [11] "USACOLLEGES.csv"
NYData <- read.csv("NY2024.csv", TRUE, ",")
head(NYData)
## id name
## 1 1.312228e+06 Rental unit in Brooklyn · ★5.0 · 1 bedroom
## 2 4.527754e+07 Rental unit in New York · ★4.67 · 2 bedrooms · 1 bed · 1 bath
## 3 9.713540e+17 Rental unit in New York · ★4.17 · 1 bedroom · 2 beds · 1 bath
## 4 3.857863e+06 Rental unit in New York · ★4.64 · 1 bedroom · 1 private bath
## 5 4.089661e+07 Condo in New York · ★4.91 · Studio · 1 bed · 1 bath
## 6 4.958498e+07 Rental unit in New York · ★5.0 · 1 bedroom · 1 bath
## host_id host_name borough neighborhood latitude longitude
## 1 7130382 Walter Brooklyn Clinton Hill 40.68371 -73.96461
## 2 51501835 Jeniffer Manhattan Hell's Kitchen 40.76661 -73.98810
## 3 528871354 Joshua Manhattan Chelsea 40.75076 -73.99461
## 4 19902271 John And Catherine Manhattan Washington Heights 40.83560 -73.94250
## 5 61391963 Stay With Vibe Manhattan Murray Hill 40.75112 -73.97860
## 6 51501835 Jeniffer Manhattan Hell's Kitchen 40.75995 -73.99296
## room_type price minimum_nights number_of_reviews last_review
## 1 Private room 55 30 3 12/20/15
## 2 Entire home/apt 144 30 9 5/1/23
## 3 Entire home/apt 187 2 6 12/18/23
## 4 Private room 120 30 156 9/17/23
## 5 Entire home/apt 85 30 11 12/3/23
## 6 Entire home/apt 115 30 5 7/29/23
## reviews_per_month calculated_host_listings_count availability_365
## 1 0.03 1 0
## 2 0.24 139 364
## 3 1.67 1 343
## 4 1.38 2 363
## 5 0.24 133 335
## 6 0.16 139 276
## number_of_reviews_ltm license rating bedrooms beds baths
## 1 0 No License 5 1 1 Not specified
## 2 2 No License 4.67 2 1 1
## 3 6 Exempt 4.17 1 2 1
## 4 12 No License 4.64 1 1 1
## 5 3 No License 4.91 Studio 1 1
## 6 2 No License 5 1 1 1
#EXPLORING DATA
## [1] 20758
## 'data.frame': 20758 obs. of 22 variables:
## $ id : num 1.31e+06 4.53e+07 9.71e+17 3.86e+06 4.09e+07 ...
## $ name : chr "Rental unit in Brooklyn · ★5.0 · 1 bedroom" "Rental unit in New York · ★4.67 · 2 bedrooms · 1 bed · 1 bath" "Rental unit in New York · ★4.17 · 1 bedroom · 2 beds · 1 bath" "Rental unit in New York · ★4.64 · 1 bedroom · 1 private bath" ...
## $ host_id : int 7130382 51501835 528871354 19902271 61391963 51501835 51501835 51501835 2526182 14251313 ...
## $ host_name : chr "Walter" "Jeniffer" "Joshua" "John And Catherine" ...
## $ borough : chr "Brooklyn" "Manhattan" "Manhattan" "Manhattan" ...
## $ neighborhood : chr "Clinton Hill" "Hell's Kitchen" "Chelsea" "Washington Heights" ...
## $ latitude : num 40.7 40.8 40.8 40.8 40.8 ...
## $ longitude : num -74 -74 -74 -73.9 -74 ...
## $ room_type : chr "Private room" "Entire home/apt" "Entire home/apt" "Private room" ...
## $ price : int 55 144 187 120 85 115 105 130 90 292 ...
## $ minimum_nights : int 30 30 2 30 30 30 30 30 30 30 ...
## $ number_of_reviews : int 3 9 6 156 11 5 3 10 19 12 ...
## $ last_review : chr "12/20/15" "5/1/23" "12/18/23" "9/17/23" ...
## $ reviews_per_month : num 0.03 0.24 1.67 1.38 0.24 0.16 0.1 0.26 0.24 1.71 ...
## $ calculated_host_listings_count: int 1 139 1 2 133 139 139 139 2 1 ...
## $ availability_365 : int 0 364 343 363 335 276 364 295 5 365 ...
## $ number_of_reviews_ltm : int 0 2 6 12 3 2 0 2 2 12 ...
## $ license : chr "No License" "No License" "Exempt" "No License" ...
## $ rating : chr "5" "4.67" "4.17" "4.64" ...
## $ bedrooms : chr "1" "2" "1" "1" ...
## $ beds : int 1 1 2 1 1 1 1 2 1 1 ...
## $ baths : chr "Not specified" "1" "1" "1" ...
## id name
## 0 0
## host_id host_name
## 0 0
## borough neighborhood
## 0 0
## latitude longitude
## 0 0
## room_type price
## 0 0
## minimum_nights number_of_reviews
## 0 0
## last_review reviews_per_month
## 0 0
## calculated_host_listings_count availability_365
## 0 0
## number_of_reviews_ltm license
## 0 0
## rating bedrooms
## 0 0
## beds baths
## 0 0
## name host_id
## 1 Rental unit in Brooklyn · ★5.0 · 1 bedroom 7130382
## 2 Rental unit in New York · ★4.67 · 2 bedrooms · 1 bed · 1 bath 51501835
## 3 Rental unit in New York · ★4.17 · 1 bedroom · 2 beds · 1 bath 528871354
## borough neighborhood latitude longitude room_type price
## 1 Brooklyn Clinton Hill 40.68371 -73.96461 Private room 55
## 2 Manhattan Hell's Kitchen 40.76661 -73.98810 Entire home/apt 144
## 3 Manhattan Chelsea 40.75076 -73.99461 Entire home/apt 187
## minimum_nights number_of_reviews last_review reviews_per_month
## 1 30 3 12/20/15 0.03
## 2 30 9 5/1/23 0.24
## 3 2 6 12/18/23 1.67
## calculated_host_listings_count availability_365 number_of_reviews_ltm
## 1 1 0 0
## 2 139 364 2
## 3 1 343 6
## license rating bedrooms beds baths
## 1 No License 5 1 1 Not specified
## 2 No License 4.67 2 1 1
## 3 Exempt 4.17 1 2 1
## NULL
## NULL
## [1] "Private room" "Entire home/apt" "Hotel room" "Shared room"
## Host_ID Listings
## 1 162280872 146
## 2 51501835 129
## 3 61391963 113
## 4 107434423 107
## 5 19303369 87
## 6 137358866 81
## 7 219517861 81
## 8 200239515 70
## 9 120762452 69
## 10 204704622 62
#Visualizing Data
#Hosts with the most listings in NYC
viz_1 <- ggplot(top_host_df, aes(x = reorder(Host_ID, -Listings), y = Listings, fill = Listings)) +
geom_bar(stat = "identity") +
labs(title = "Hosts with the most listings in NYC",
x = "Host IDs",
y = "Count of listings") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(viz_1)
#Density and Distribution of Prices for Each Neighborhood
sub_6 <- subset(NYData, price < 500)
viz_2 <- ggplot(sub_6, aes(x = borough, y = price, fill = borough)) +
geom_violin() +
labs(title = "Density and Distribution of Prices for Each Neighborhood",
x = "Neighborhood",
y = "Price Per Night") +
theme(legend.position = "none",
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"))
print(viz_2)
#Density and Distribution of Prices for Each Neighborhood
top_neighbourhoods <- head(names(sort(table(NYData$borough), decreasing = TRUE)), 10)
sub_6 <- subset(NYData, price < 500 & borough %in% top_neighbourhoods)
viz_boxplot <- ggplot(sub_6, aes(x = borough, y = price, fill = borough)) +
geom_boxplot() +
labs(title = "Distribution of Prices for Each Neighbourhood",
x = "Neighbourhood",
y = "Price Per Night") +
theme(legend.position = "none",
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"))
print(viz_boxplot)
#Visualizing the distribution of room types
options(repr.plot.width=12, repr.plot.height=6)
NYData$room_type <- factor(NYData$room_type, levels = unique(NYData$room_type))
custom_colors <- c("blue", "green", "orange", "red")
viz_grouped_bar <- ggplot(NYData, aes(x = borough, fill = room_type)) +
geom_bar(position = "dodge", width = 0.7) +
labs(title = "Distribution of Room Types per Neighborhood Group",
x = "Neighborhood Group",
y = "Number of Listings") +
scale_fill_manual(values = custom_colors) +
theme_minimal()
print(viz_grouped_bar)
#Creating a map of New York and its neighborhoods
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(leaflet)
library(leaflet.extras)
coor <- st_read("coor")
## Reading layer `coor' from data source `/Users/elragaby/Desktop/Data Science/coor' using driver `GeoJSON'
## Simple feature collection with 310 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS 84
str(coor)
## Classes 'sf' and 'data.frame': 310 obs. of 5 variables:
## $ neighborhood: chr "Allerton" "Alley Pond Park" "Arden Heights" "Arlington" ...
## $ boroughCode : chr "2" "4" "5" "5" ...
## $ borough : chr "Bronx" "Queens" "Staten Island" "Staten Island" ...
## $ X.id : chr "http://nyc.pediacities.com/Resource/Neighborhood/Allerton" "http://nyc.pediacities.com/Resource/Neighborhood/Alley_Pond_Park" "http://nyc.pediacities.com/Resource/Neighborhood/Arden_Heights" "http://nyc.pediacities.com/Resource/Neighborhood/Arlington" ...
## $ geometry :sfc_POLYGON of length 310; first list element: List of 1
## ..$ : num [1:16, 1:2] -73.8 -73.8 -73.9 -73.9 -73.9 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
## ..- attr(*, "names")= chr [1:4] "neighborhood" "boroughCode" "borough" "X.id"
ny_airbnb <- read.csv("NY2024.csv")
# Calculating average price by neighborhood
feq <- aggregate(ny_airbnb$price, by = list(ny_airbnb$neighborhood), FUN = mean)
names(feq) <- c('neighborhood', 'average_price')
# Merging 'coor' and 'feq' data frames on 'neighborhood'
coor <- merge(coor, feq, by='neighborhood', all.x=TRUE)
# Rounding 'average_price' to the nearest integer
coor$average_price <- round(coor$average_price)
# Setting the range manually based on your data
color_scale <- colorNumeric(palette= "Reds", domain = NULL)
# Creating a leaflet map
map3 <- leaflet(coor) %>%
setView(lng = -74.00, lat = 40.71, zoom = 10) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(
fillColor = ~color_scale(average_price),
weight = 1,
color = "black",
fillOpacity = 0.5,
highlight = highlightOptions(
weight = 3,
fillColor = ~color_scale(average_price),
fillOpacity = 0.8
),
#label = ~paste(neighborhood, ': '," $",average_price, "/ night")
) %>%
addLegend(
position = "bottomright",
pal = color_scale,
values = coor$average_price,
title = "Average Price",
opacity = 0.7
)
map3
#Creating a map with all the AirBnB listings
library(sf)
library(leaflet)
library(leaflet.extras)
# Creating a simple spatial object
NYData_sf <- st_as_sf(NYData, coords = c("longitude", "latitude"), crs = 4326)
colors_red <- colorNumeric(palette = "Reds", domain = NULL)
# Creating leaflet map
map <- leaflet(NYData_sf) %>%
addProviderTiles("CartoDB.Voyager") %>%
addMarkers(
clusterOptions = markerClusterOptions(),
group = "listings",
options = markerOptions(
fillColor = ~colors_red(price),
fillOpacity = 0.75,
weight = 2,
color = "black",
radius = 5
),
popup = ~paste(
"<strong>Price:</strong>", price, "<br>",
"<strong>Room Type:</strong>", room_type, "<br>",
"<strong>Minimum Nights:</strong>", minimum_nights, "<br>",
"<strong>Bedrooms:</strong>", bedrooms, "<br>",
"<strong>Beds:</strong>", beds, "<br>",
"<strong>Baths:</strong>", baths
)
) %>%
addMiniMap(toggleDisplay = TRUE)
map
#Combining Map 1 & 2
library(sf)
library(leaflet)
library(leaflet.extras)
coor <- st_read("coor")
## Reading layer `coor' from data source `/Users/elragaby/Desktop/Data Science/coor' using driver `GeoJSON'
## Simple feature collection with 310 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS 84
str(coor)
## Classes 'sf' and 'data.frame': 310 obs. of 5 variables:
## $ neighborhood: chr "Allerton" "Alley Pond Park" "Arden Heights" "Arlington" ...
## $ boroughCode : chr "2" "4" "5" "5" ...
## $ borough : chr "Bronx" "Queens" "Staten Island" "Staten Island" ...
## $ X.id : chr "http://nyc.pediacities.com/Resource/Neighborhood/Allerton" "http://nyc.pediacities.com/Resource/Neighborhood/Alley_Pond_Park" "http://nyc.pediacities.com/Resource/Neighborhood/Arden_Heights" "http://nyc.pediacities.com/Resource/Neighborhood/Arlington" ...
## $ geometry :sfc_POLYGON of length 310; first list element: List of 1
## ..$ : num [1:16, 1:2] -73.8 -73.8 -73.9 -73.9 -73.9 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
## ..- attr(*, "names")= chr [1:4] "neighborhood" "boroughCode" "borough" "X.id"
ny_airbnb <- read.csv("NY2024.csv")
# Calculating average price by neighborhood
feq <- aggregate(ny_airbnb$price, by = list(ny_airbnb$neighborhood), FUN = mean)
names(feq) <- c('neighborhood', 'average_price')
# Merging 'coor' and 'feq' data frames on 'neighborhood'
coor <- merge(coor, feq, by='neighborhood', all.x=TRUE)
# Rounding 'average_price' to the nearest integer
coor$average_price <- round(coor$average_price)
color_scale <- colorNumeric(palette= "Purples", domain = NULL)
# Creating a leaflet map
map <- leaflet(coor) %>%
setView(lng = -74.00, lat = 40.71, zoom = 10) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(
fillColor = ~color_scale(average_price),
weight = 1,
color = "black",
fillOpacity = 0.5,
highlight = highlightOptions(
weight = 3,
fillColor = ~color_scale(average_price),
fillOpacity = 0.8
),
#label = ~paste(neighborhood, ': '," $",average_price, "/ night")
) %>%
addLegend(
position = "bottomright",
pal = color_scale,
values = coor$average_price,
title = "Average Price",
opacity = 0.7
)
# Creating a simple spatial object
NYData_sf <- st_as_sf(NYData, coords = c("longitude", "latitude"), crs = 4326)
colors_red <- colorNumeric(palette = "Reds", domain = NULL)
# Adding markers to the existing map
map <- addMarkers(
map,
data = NYData_sf,
clusterOptions = markerClusterOptions(),
group = "listings",
options = markerOptions(
fillColor = ~colors_red(price),
fillOpacity = 0.75,
weight = 2,
color = "black",
radius = 5
),
popup = ~paste(
"<strong>Price:</strong>", price, "<br>",
"<strong>Room Type:</strong>", room_type, "<br>",
"<strong>Minimum Nights:</strong>", minimum_nights, "<br>",
"<strong>Bedrooms:</strong>", bedrooms, "<br>",
"<strong>Beds:</strong>", beds, "<br>",
"<strong>Baths:</strong>", baths
)
) %>%
addMiniMap(toggleDisplay = TRUE)
map